import os

from langchain import hub
from langchain.indexes import vectorstore
from langchain_community.vectorstores.chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter

os.environ["DASHSCOPE_API_KEY"] = "sk-10920ad0a9d542af96353edd7ab3e613"

from langchain_community.llms import Tongyi

#result = Tongyi().invoke("What NFL team won the Super Bowl in the year Justin Bieber was born?")

#print(result)

from langchain_community.document_loaders import UnstructuredMarkdownLoader
markdown_path = r"C:\Users\Administrator\Desktop\README.md"
loader = UnstructuredMarkdownLoader(markdown_path)

data = loader.load()
pass

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(data)
from langchain_community.embeddings import DashScopeEmbeddings

embeddings = DashScopeEmbeddings(
    model="text-embedding-v1", dashscope_api_key="sk-10920ad0a9d542af96353edd7ab3e613"
)
vectorstore: Chroma = Chroma.from_documents(documents=splits, embedding=(embeddings))

'''
第五步，输入问题，查找最近的
'''

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever(seach_kwargs={"k":4})
prompt = hub.pull("rlm/rag-prompt")
print(prompt)
#llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
llm = Tongyi()


def format_docs(docs):
   # print(docs)
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

result = rag_chain.invoke("请问该平台有哪些内置功能")

print(result)